#============================================================================================================================================================================================#
##CLeaning Script for "Counterinsurgency Tactics, Rebel Grievances, and Who Keeps Fighting"
rm(list=ls())

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#loading packages
ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

packages <- c("dplyr", "stringr") 
ipak(packages)


#setwd("")

##Reading in raw data
data <- read.csv("Raw_Data.csv")

#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================PART 2: DEPENDENT VARIABLE======================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
##Dependent Variables

##Step 1A: Indicator variable for service claimed during Periods 4-7
data$PeriodClaim4 <- as.numeric(data$April1_1918toMarch31_1919_Claimed_Organization!="None")
data$PeriodClaim5 <- as.numeric(data$April1_1919toMarch31_1920_Claimed_Organization!="None")
data$PeriodClaim6 <- as.numeric(data$April1_1920toMarch31_1921_Claimed_Organization!="None")
data$PeriodClaim7 <- as.numeric(data$April1_1921toJully11_1921_Claimed_Organization!="None")


##Step 1B: Indicator variable for service approved during Periods 4-7
data$PeriodApprove4 <- as.numeric(data$April1_1918toMarch31_1919_Approved!="None")
data$PeriodApprove5 <- as.numeric(data$April1_1919toMarch31_1920_Approved!="None")
data$PeriodApprove6 <- as.numeric(data$April1_1920toMarch31_1921_Approved!="None")
data$PeriodApprove7 <- as.numeric(data$April1_1921toJully11_1921_Approved!="None")


##Dependent variable all claimed: service in all periods of the War of Independence claimed
data$War_Independence_All_Claim <- data$PeriodClaim4==TRUE&data$PeriodClaim5==TRUE&data$PeriodClaim6==TRUE&data$PeriodClaim7==TRUE
data$War_Independence_All_Claim <- as.numeric(data$War_Independence_All_Claim)

##Dependent variable count: summing periods of the War of Independence claimed 
data$War_Independence_Count <- as.numeric((data$PeriodClaim4 +  data$PeriodClaim5 +  data$PeriodClaim6 +  data$PeriodClaim7))

##Dependent variable any claimed: indicator variable for service in the "War of Independence" claimed (any of Periods 4-7)
data$War_Independence_Any_Claim <- data$PeriodClaim4==TRUE|data$PeriodClaim5==TRUE|data$PeriodClaim6==TRUE|data$PeriodClaim7==TRUE
data$War_Independence_Any_Claim <- as.numeric(data$War_Independence_Any_Claim)

##Dependent variable all approved: indicator variable for service in the "War of Independence" approved (all of Periods 4-7)
data$War_Independence_All_Approve <- data$PeriodApprove4==TRUE&data$PeriodApprove5==TRUE&data$PeriodApprove6==TRUE&data$PeriodApprove7==TRUE
data$War_Independence_All_Approve <- as.numeric(data$War_Independence_All_Approve)

##Dependent variable any approved: indicator variable for service in the "War of Independence" approved (any of Periods 4-7)
data$War_Independence_Any_Approve <- data$PeriodApprove4==TRUE|data$PeriodApprove5==TRUE|data$PeriodApprove6==TRUE|data$PeriodApprove7==TRUE
data$War_Independence_Any_Approve <- as.numeric(data$War_Independence_Any_Approve)


#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================PART 4: COVARIATES==============================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#

##age in rising
data$age <- (1916-data$birthyear)

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Rank and file
##Coding rank file, and dummy for rank "Unknown" (will run in different ways in appendix)
data$rank_file <- as.numeric(grepl("Volunteer|Private", data$organisation1_rank))
data$rank_unknown <- as.numeric(grepl("Unknown", data$organisation1_rank))

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Irish Citizen Army
data$citizen_army <- as.numeric(grepl("Citizen", data$Easter_Week_Amount_Claimed_Organization))

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Whether individuals fought in the Dublin Brigade in the Easter Rising 
data$dublin_brigade <- as.numeric(grepl("Dublin", data$organisation1_brigade))

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Organization 2 officers
data$organisation2_officers <- as.numeric(grepl("Officer|Commandant|General|Captain|Adjutant|Quartermaster|Lieutenant|Leader|Sergeant|Corporal|Commander", data$organisation2_rank))

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Cleaning Organization 1 rank
data$organisation1_rank <- gsub("2 Lieutenant", "Second Lieutenant", data$organisation1_rank)
data$organisation1_rank <- gsub("Staff Officer \\(Organiser\\)", "Staff Officer", data$organisation1_rank)

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Long sentence
data$organisation1_rank <- gsub("June 1717", "June 1917", data$organisation1_rank) ##Typographical error
data$long_sentence <- grepl("17", data$year_month_released)
data$long_sentence <- as.numeric(data$long_sentence)

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Early join
data$early_join <- grepl("13|14", data$year_joined)
data$early_join <- as.numeric(data$early_join)

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Joined 1913
data$joined_1913 <- grepl("13", data$year_joined)
data$joined_1913 <- as.numeric(data$joined_1913)


#============================================================================================================================================================================================#
#============================================================EASTER RISING LOCATION==========================================================================================================#
#============================================================================================================================================================================================#
##Gives location of fighting during Easter Week
data$easter_rising_location <- gsub('[[:punct:]]','', data$easter_rising_location)

easter_rising_location_list <- strsplit(data$easter_rising_location, split="\n")

#easter week start and end locations
for(i in 1:length(easter_rising_location_list)){
  data$easter_rising_location_startplace[i] <- easter_rising_location_list[[i]][1]  
  data$easter_rising_location_endplace[i] <- easter_rising_location_list[[i]][length(easter_rising_location_list[[i]])]  
}


#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Pension Act
data$military_service_pension_acts_1934 <- as.numeric(as.character(recode(data$military_service_pension_acts_1934, "Yes"="1", "No"="0")))


#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##Cleaning County
data$county <- gsub("Wexford.", "Wexford", data$county)
data$county <- gsub("Ath Cliath", "Dublin", data$county)
data$county <- gsub("Baile Átha Cliath \\(Dublin\\)", "Dublin", data$county)
data$county <- gsub("County Kildare,", "Kildare", data$county)
data$county <- gsub("Loch gCarman", "Wexford", data$county)
data$county <- gsub("Contae Na Gaillimhe", "Galway", data$county)
data$county <- gsub("Tiobrad Árann", "Tipperary", data$county)

#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================WRITING OUTPUT CSV==============================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
#============================================================================================================================================================================================#
##Writing Master Data
setwd("/Users/connorhuff/Dropbox/Huff_Internment/Replication2/Replication_Final")
write.csv(x=data, file="MasterData.csv", row.names = FALSE)

